module net.BurtonRadons.dedit.highlight.d;

import net.BurtonRadons.dedit.main;
import std.path;
import std.string;
import std.ctype;

alias std.ctype.isdigit isdigit;

/** The D highlighter. */
class D_Highlighter : SyntaxHighlighter
{
    char [] [] reservedBase = 
    [
        "abstract", "alias", "align", "asm", "assert", "auto", "bit", "body",
        "break", "byte", "case", "cast", "catch", "cent", "char", "class",
        "complex", "const", "continue", "debug", "default", "delegate",
        "delete", "deprecated", "do", "double", "else", "enum",
        "export", "extended", "extern", "false", "final", "finally", "float",
        "for", "function", "goto", "if", "imaginary", "import", "in", "inout",
        "instance", "int", "interface", "invariant", "long", "module",
        "new", "null", "out", "override", "private", "protected",
        "public", "real", "return", "short", "static", "struct", "super",
        "switch", "synchronized", "template", "this", "throw", "true",
        "try", "typedef",  "ubyte", "ucent", "uint", "ulong", "union",
        "unittest", "ushort", "version", "void", "volatile", "wchar", 
        "while", "with", "cfloat", "cdouble", "creal", "ifloat", "idouble",
        "ireal",
    ];

    version (X86)
    {
        char [] [] asmSpecialBase =
        [
            "AL", "AH", "AX", "EAX", "BL", "BH", "BX", "EBX",
            "CL", "CH", "CX", "ECX", "DL", "DH", "DX", "EDX",
            "BP", "EBP", "SP", "ESP", "DI", "EDI",  "SI", "ESI",
            "ES", "CS", "SS", "DS", "GS", "FS",
            "CR0", "CR2", "CR3", "CR4",
            "DR0", "DR1", "DR2", "DR3", "DR6", "DR7",
            "TR3", "TR4", "TR5", "TR6", "TR7",
            "ST", "ST(0)", "ST(1)", "ST(2)", "ST(3)", "ST(4)", "ST(5)", "ST(6)", "ST(7)",
            "MM0", "MM1", "MM2", "MM3", "MM4", "MM5", "MM6", "MM7",
        ];

        char [] [] asmReservedBase =
        [
        /* DLI's inline assembler will use the same syntax as DMD's */
            "__LOCAL_SIZE", "align", "byte", "call", "db", "dd", "de", "df",
            "di", "dl", "double", "ds", "dword", "even", "extended", "far",
            "float", "int", "lock", "mov", "naked", "near", "offset", "pc",
            "pop", "ptr", "rep", "repe", "repne", "repnz", "repz", "seg",
            "short", "word",

            "aaa", "aad", "aam", "aas", "adc", "add", "addpd", "addps", "addsd",
            "addss", "and", "andnpd", "andnps", "andpd", "andps", "arpl",
            "bound", "bsf", "bsr", "bswap", "bt", "btc", "btr", "bts", "call",
            "cbw", "cdq", "clc", "cld", "clflush", "cli", "clts", "cmc", "cmova",
            "cmovae", "cmovb", "cmovbe", "cmovc", "cmove", "cmovg", "cmovge",
            "cmovl", "cmovle", "cmovna", "cmovnae", "cmovnb", "cmovnbe", "cmovnc",
            "cmovne", "cmovng", "cmovnge", "cmovnl", "cmovnle", "cmovno",
            "cmovnp", "cmovns", "cmovnz", "cmovo", "cmovp", "cmovpe", "cmovpo",
            "cmovs", "cmovz", "cmp", "cmppd", "cmpps", "cmps", "cmpsb", "cmpsd",
            "cmpss", "cmpsw", "cmpxch8b", "cmpxchg", "comisd", "comiss", "cpuid",
            "cvtdq2pd", "cvtdq2ps", "cvtpd2dq", "cvtpd2pi", "cvtpd2ps",
            "cvtpi2pd", "cvtpi2ps", "cvtps2dq", "cvtps2pd", "cvtps2pi",
            "cvtsd2si", "cvtsd2ss", "cvtsi2sd", "cvtsi2ss", "cvtss2sd",
            "cvtss2si", "cvttpd2dq", "cvttpd2pi", "cvttps2dq", "cvttps2pi",
            "cvttsd2si", "cvttss2si", "cwd", "cwde", "da", "daa", "das", "db",
            "dd", "de", "dec", "df", "di", "div", "divpd", "divps", "divsd",
            "divss", "dl", "dq", "ds", "dt", "dw", "emms", "enter", "f2xm1",
            "fabs", "fadd", "faddp", "fbld", "fbstp", "fchs", "fclex", "fcmovb",
            "fcmovbe", "fcmove", "fcmovnb", "fcmovnbe", "fcmovne", "fcmovnu",
            "fcmovu", "fcom", "fcomi", "fcomip", "fcomp", "fcompp", "fcos",
            "fdecstp", "fdiv", "fdivp", "fdivr", "fdivrp", "ffree", "fiadd",
            "ficom", "ficomp", "fidiv", "fidivr", "fild", "fimul", "fincstp",
            "finit", "fist", "fistp", "fisub", "fisubr", "fld", "fld1", "fldcw",
            "fldenv", "fldl2e", "fldl2t", "fldlg2", "fldln2", "fldpi", "fldz",
            "fmul", "fmulp", "fnclex", "fninit", "fnop", "fnsave", "fnstcw",
            "fnstenv", "fnstsw", "fpatan", "fprem", "fprem1", "fptan", "frndint",
            "frstor", "fsave", "fscale", "fsetpm", "fsin", "fsincos", "fsqrt",
            "fst", "fstcw", "fstenv", "fstp", "fstsw", "fsub", "fsubp", "fsubr",
            "fsubrp", "ftst", "fucom", "fucomi", "fucomip", "fucomp", "fucompp",
            "fwait", "fxam", "fxch", "fxrstor", "fxsave", "fxtract", "fyl2x",
            "fyl2xp1", "hlt", "idiv", "imul", "in", "inc", "ins", "insb", "insd",
            "insw", "int", "into", "invd", "invlpg", "iret", "iretd", "ja", "jae",
            "jb", "jbe", "jc", "jcxz", "je", "jecxz", "jg", "jge", "jl", "jle",
            "jmp", "jna", "jnae", "jnb", "jnbe", "jnc", "jne", "jng", "jnge",
            "jnl", "jnle", "jno", "jnp", "jns", "jnz", "jo", "jp", "jpe", "jpo",
            "js", "jz", "lahf", "lar", "ldmxcsr", "lds", "lea", "leave", "les",
            "lfence", "lfs", "lgdt", "lgs", "lidt", "lldt", "lmsw", "lock",
            "lods", "lodsb", "lodsd", "lodsw", "loop", "loope", "loopne",
            "loopnz", "loopz", "lsl", "lss", "ltr", "maskmovdqu", "maskmovq",
            "maxpd", "maxps", "maxsd", "maxss", "mfence", "minpd", "minps",
            "minsd", "minss", "mov", "movapd", "movaps", "movd", "movdq2q",
            "movdqa", "movdqu", "movhlps", "movhpd", "movhps", "movlhps",
            "movlpd", "movlps", "movmskpd", "movmskps", "movntdq", "movnti",
            "movntpd", "movntps", "movntq", "movq", "movq2dq", "movs", "movsb",
            "movsd", "movss", "movsw", "movsx", "movupd", "movups", "movzx",
            "mul", "mulpd", "mulps", "mulsd", "mulss", "neg", "nop", "not",
            "or", "orpd", "orps", "out", "outs", "outsb", "outsd", "outsw",
            "packssdw", "packsswb", "packuswb", "paddb", "paddd", "paddq",
            "paddsb", "paddsw", "paddusb", "paddusw", "paddw", "pand", "pandn",
            "pavgb", "pavgw", "pcmpeqb", "pcmpeqd", "pcmpeqw", "pcmpgtb",
            "pcmpgtd", "pcmpgtw", "pextrw", "pinsrw", "pmaddwd", "pmaxsw",
            "pmaxub", "pminsw", "pminub", "pmovmskb", "pmulhuw", "pmulhw",
            "pmullw", "pmuludq", "pop", "popa", "popad", "popf", "popfd", "por",
            "prefetchnta", "prefetcht0", "prefetcht1", "prefetcht2", "psadbw",
            "pshufd", "pshufhw", "pshuflw", "pshufw", "pslld", "pslldq", "psllq",
            "psllw", "psrad", "psraw", "psrld", "psrldq", "psrlq", "psrlw",
            "psubb", "psubd", "psubq", "psubsb", "psubsw", "psubusb", "psubusw",
            "psubw", "punpckhbw", "punpckhdq", "punpckhqdq", "punpckhwd",
            "punpcklbw", "punpckldq", "punpcklqdq", "punpcklwd", "push", "pusha",
            "pushad", "pushf", "pushfd", "pxor", "rcl", "rcpps", "rcpss", "rcr",
            "rdmsr", "rdpmc", "rdtsc", "rep", "repe", "repne", "repnz", "repz",
            "ret", "retf", "rol", "ror", "rsm", "rsqrtps", "rsqrtss", "sahf",
            "sal", "sar", "sbb", "scas", "scasb", "scasd", "scasw", "seta",
            "setae", "setb", "setbe", "setc", "sete", "setg", "setge", "setl",
            "setle", "setna", "setnae", "setnb", "setnbe", "setnc", "setne",
            "setng", "setnge", "setnl", "setnle", "setno", "setnp", "setns",
            "setnz", "seto", "setp", "setpe", "setpo", "sets", "setz", "sfence",
            "sgdt", "shl", "shld", "shr", "shrd", "shufpd", "shufps", "sidt",
            "sldt", "smsw", "sqrtpd", "sqrtps", "sqrtsd", "sqrtss", "stc", "std",
            "sti", "stmxcsr", "stos", "stosb", "stosd", "stosw", "str", "sub",
            "subpd", "subps", "subsd", "subss", "sysenter", "sysexit", "test",
            "ucomisd", "ucomiss", "ud2", "unpckhpd", "unpckhps", "unpcklpd",
            "unpcklps", "verr", "verw", "wait", "wbinvd", "wrmsr", "xadd",
            "xchg", "xlat", "xlatb", "xor", "xorpd", "xorps ",

            "pavgusb", "pf2id", "pfacc", "pfadd", "pfcmpeq", "pfcmpge", "pfcmpgt",
            "pfmax", "pfmin", "pfmul", "pfnacc", "pfpnacc", "pfrcp", "pfrcpit1",
            "pfrcpit2", "pfrsqit1", "pfrsqrt", "pfsub", "pfsubr", "pi2fd",
            "pmulhrw", "pswapd ",
        ];
    }
    else
    {
        char [] [] asmReservedBase;
        char [] [] asmSpecialBase;
    }

    bit [char []] reserved;
    bit [char []] asmReserved;
    bit [char []] asmSpecial;

    const char [] symbols = "()[]<>{}:;=!%^&*-+|/.~,$";

    bit [char []] keywordDict (char [] [] base)
    {
        bit [char []] dict;

        for (int c; c < base.length; c ++)
            dict [base [c]] = true;

        return dict;
    }

    this ()
    {
        reserved = keywordDict (reservedBase);
        asmReserved = keywordDict (asmReservedBase);
        asmSpecial = keywordDict (asmSpecialBase);
    }

    static this ()
    {
        list ~= new D_Highlighter ();
    }

    override char [] name () { return "D"; }
    override char [] exts () { return "*.d"; }
    
    override int indent (Document document, int index)
    {
        char [] line = document.lines [index];
        int start, end;
        
        if (line.length == 0)
            return 0;
        for (start = 0; start < line.length && std.string.iswhite (line [start]); start ++) { }
        for (end = line.length - 1; end > 0 && std.string.iswhite (line [end]); end --) { }
        if (end < start)
            return 0;
        
        char [] keyword = getKeyword (&line [start], line.length - start);
        int dent = 0;
        
        /* Get a preliminary indentation. */
        if (line [start] == '{')
            dent = 1;        
        switch (keyword)
        {
            case "asm": dent = 1; break;
            case "break": dent = -1; break;
            case "case": dent = 1; break;
            case "catch": dent = 1; break;
            case "class": dent = 1; break;
            case "continue": dent = -1; break;
            case "default": dent = 1; break;
            case "do": dent = 1; break;
            case "else": dent = 1; break;
            case "enum": dent = 1; break;
            case "finally": dent = 1; break;
            case "for": dent = 1; break;
            case "if": dent = 1; break;
            case "interface": dent = 1; break;
            case "in": dent = 1; break;
            case "out": dent = 1; break;
            case "return": dent = -1; break;
            case "struct": dent = 1; break;
            case "switch": dent = 1; break;
            case "union": dent = 1; break;
            case "while": dent = 1; break;
            case "with": dent = 1; break;
            default: break;
        }
        
        /* Single-line case statements (as with right above) should be canceled by a break. */
        if (keyword == "case")
        {
            int c = start + 4;
            
            while (c < line.length - 1)
            {
                keyword = getKeyword (&line [c + 1], line.length - c - 1);
                if (keyword == "break")
                    return 0;
                if (keyword == "return")
                {
                    c += keyword.length;
                    while (c < line.length)
                        if (line [c ++] == ';')
                            return 0;
                }
                c += keyword.length + 1;                    
            }
        }
        else /* Otherwise these can be canceled by an opening and closing curly brace. */
        {
            int c = start + keyword.length;
            int depth = 0;
            
            while (c < line.length)
            {
                if (line [c] == '{')
                    depth ++;
                else if (line [c] == '}')
                {
                    depth --;
                    if (depth == 0)
                        return 0;
                }
                
                c ++;
            }
        }

        return dent;
    }

    override float match (char [] filename, char [] [] data)
    {
        if (!icmp (std.path.getExt (filename), "d"))
            return 1;
        return 0;
    }

    final bit isSymbol (char f)
    {
        for (int c; c < symbols.length; c ++)
            if (f == symbols [c])
                return true;

        return false;
    }

    final bit isIdentifierStart (char f) { return isalpha (f) || f == '_'; }
    final bit isIdentifierMiddle (char f) { return isalnum (f) || f == '_'; }

    final char [] getKeyword (char *c, int n)
    {
        int d;

        if (n == 0 || !isIdentifierStart (*c))
            return null;
        for (d = 1; d < n; d ++)
            if (!isIdentifierMiddle (c [d]))
                break;

        return c [0 .. d];
    }
    
    struct LineInfo
    {
        char code; /**< Code that was in the last line. */
        char open;
            /**< Current open type:
               * <ul>
               * <li>'*' - multiline comment.
               * <li>'"' - double-quoted std.string.
               * <li>"'" - single-quoted std.string.
               * <li>'`' - accute accent-quoted std.string.
               * <li>'r' - raw std.string.
               * <li>'i' - identifier.
               * <li>'/' - single-line comment.
               * <li>'#' - number.
               * <li>'+' - nested comment.
               * </ul>
               */
        char mode;
            /**< Current master mode:
               * <ul>
               * <li>'A' - asm statement.
               * <li>'\0' - normal.
               * </ul>
               */
        int depth; /**< Nested comment depth. */
    }

    /* We take the open code, mode, and nested comment depth. */
    override int extraSize () { return LineInfo.size; }

    override void highlight (char [] line, char [] high, void *lastp, void *nextp)
    {
        LineInfo *last = (LineInfo *) lastp;
        LineInfo *next = (LineInfo *) nextp;
        char *c, h, e;
        char code;
        char open;
        char mode;
        int depth;

        if (last !== null)
        {
            code = last.code;
            open = last.open;
            mode = last.mode;
            depth = last.depth;
        }

        c = line;
        h = high;
        e = c + line.length;

        while (c < e)
        {
            int n = (int) (e - c);
            char f = *c;
            char [] r;

        restart:
            if (open == '*')
            {
                if (n > 1 && c [0] == '*' && c [1] == '/')
                {
                    *h ++ = code;
                    *h ++ = code;
                    code = 0;
                    open = 0;
                    c += 2;
                }
                else
                    goto def;
            }
            else if (open == '\"' || open == '\'')
            {
                if (c [0] == '\\')
                {
                    *h ++ = code;
                    c ++;
                    if (c < e)
                        goto def;
                    goto dun;
                }
                else if (c [0] == open)
                {
                    *h ++ = code;
                    code = open = 0;
                    c ++;
                }
                else
                    goto def;
            }
            else if (open == '`')
            {
                if (c [0] == '`')
                {
                    *h ++ = code;
                    code = open = 0;
                    c ++;
                }
                else
                    goto def;
            }
            else if (open == 'r')
            {
                if (c [0] == '\"')
                {
                    *h ++ = code;
                    code = open = 0;
                    c ++;
                }
                else
                    goto def;
            }
            else if (open == 'i')
            {
                if (isalnum (f) || f == '_')
                    goto def;
                open = code = 0;
                goto restart;
            }
            else if (open == '/')
                goto def;
            else if (open == '#')
            {
                if (isdigit (f) || f == 'x' || f == 'X' || f == '.' || f == 'e' || f == 'E'
                 || f == 'a' || f == 'b' || f == 'c' || f == 'd' || f == 'e' || f == 'f'
                 || f == 'A' || f == 'B' || f == 'C' || f == 'D' || f == 'E' || f == 'F'
                 || f == 'l' || f == 'L' || f == 'F' || f == '_')
                    goto def;
                else
                {
                    code = open = 0;
                    goto restart;
                }
            }
            else if (open == '+')
            {
                if (n > 1 && c [0] == '/' && c [1] == '+')
                {
                    depth += 1;
                    *h ++ = code;
                    *h ++ = code;
                    c += 2;
                }
                else if (n > 1 && c [0] == '+' && c [1] == '/')
                {
                    depth -= 1;
                    *h ++ = code;
                    *h ++ = code;
                    c += 2;
                    if (depth == 0)
                        open = 0;
                }
                else
                    goto def;
            }
        /* open == 0 from here on */
            else if (n > 1 && c [0] == '/' && c [1] == '*')
            {
                open = code = '*';
                *h ++ = code;
                *h ++ = code;
                c += 2;
            }
            else if (n > 1 && c [0] == '/' && c [1] == '+')
            {
                open = '+', code = '*';
                *h ++ = code;
                *h ++ = code;
                depth = 1;
                c += 2;
            }
            else if (f == '\"')
            {
                open = code = '\"';
                goto def;
            }
            else if (f == '\'')
            {
                open = '\'';
                code = '\"';
                goto def;
            }
            else if (f == '`')
            {
                open = '`';
                code = '"';
                goto def;
            }
            else if (f == 'r' && n && c [1] == '"')
            {
                open = 'r';
                code = '"';
                goto def;
            }
            else if (n > 1 && c [0] == '/' && c [1] == '/')
            {
                open = '/';
                code = '*';
                goto def;
            }
            else if (isdigit (f) || (f == '.' && (n == 1 || isdigit (c [1]))))
            {
                open = code = '#';
                goto def;
            }
            else if (mode == 'A')
            {
                if (isSymbol (f))
                {
                    *h ++ = 's', c ++;
                    if (f == '}')
                        mode = 0;
                }
                else if ((r = getKeyword (c, n)) !== null)
                {
                    h [0 .. r.length] = (r in asmReserved) ? (char) 'r'
                                      : (r in asmSpecial)  ? (char) 'm'
                                      : (char) 'i';
                    c += r.length;
                    h += r.length;
                }
                else
                    goto def;
            }
            else
            {
                if (isSymbol (f))
                {
                    *h ++ = 's';
                    c ++;
                    if (mode == 'A' && f == '}')
                        mode = 0;
                }
                else if ((r = getKeyword (c, n)) !== null)
                {
                    h [0 .. r.length] = (r in reserved) ? 'r' : 'i';
                    c += r.length;
                    h += r.length;
                    if (r == "asm")
                        mode = 'A';
                }
                else
                {
                    *h ++ = '\0';
                    c ++;
                }
            }

        dun:
            continue;
        def:
            *h ++ = code;
            c += 1;
        }

        if (open == 'i' || open == '/' || open == '#')
            open = 0;

        next.code = code;
        next.open = open;
        next.mode = mode;
        next.depth = depth;
    }
}

